//! This script takes care of the following tasks:
//!
//! - generate `src/Config.zig`
//! - generate `schema.json`
//! - generate metadata about Zig's builtins (uses `src/tools/langref.html.in`)
//! - generate ZLS configuration options for vscode-zig's package.json

const std = @import("std");

const ConfigOption = struct {
    /// Name of config option
    name: []const u8,
    /// (used in doc comments & schema.json)
    description: []const u8,
    /// zig type in string form. e.g "u32", "[]const u8", "?usize"
    type: []const u8,
    /// if the zig type should be an enum, this should contain
    /// a list of enum values and `type`
    /// If this is set, the value `type` should to be `enum`
    @"enum": ?[]const []const u8 = null,
    /// used in Config.zig as the default initializer
    default: std.json.Value,

    fn getTypescriptType(self: ConfigOption) error{UnsupportedType}![]const u8 {
        std.debug.assert(self.type.len != 0);
        const ty = self.type[@intFromBool(self.type[0] == '?')..];
        return if (std.mem.eql(u8, ty, "[]const []const u8"))
            "array"
        else if (std.mem.eql(u8, ty, "[]const u8"))
            "string"
        else if (std.mem.eql(u8, ty, "bool"))
            "boolean"
        else if (std.mem.eql(u8, ty, "usize"))
            "integer"
        else if (std.mem.eql(u8, ty, "enum"))
            "string"
        else
            error.UnsupportedType;
    }

    fn formatZigType(config: ConfigOption, writer: *std.Io.Writer) std.Io.Writer.Error!void {
        if (config.@"enum") |enum_members| {
            try writer.writeAll("enum {\n");
            for (enum_members) |member_name| {
                try writer.print("    {s},\n", .{member_name});
            }
            std.debug.assert(enum_members.len > 1);
            try writer.writeByte('}');
            return;
        }
        try writer.writeAll(config.type);
    }

    fn fmtZigType(self: ConfigOption) std.fmt.Alt(ConfigOption, formatZigType) {
        return .{ .data = self };
    }

    fn formatDefaultValue(config: ConfigOption, writer: *std.Io.Writer) std.Io.Writer.Error!void {
        if (config.default == .array) {
            try writer.writeAll("&.{");
            for (config.default.array.items, 0..) |item, i| {
                if (i != 0) try writer.writeByte(',');
                std.json.Stringify.value(item, .{}, writer) catch |err| return @errorCast(err);
            }
            try writer.writeByte('}');
            return;
        }
        if (config.@"enum" != null) {
            try writer.print(".{s}", .{config.default.string});
            return;
        }
        std.json.Stringify.value(config.default, .{}, writer) catch |err| return @errorCast(err);
    }

    fn fmtDefaultValue(self: ConfigOption) std.fmt.Alt(ConfigOption, formatDefaultValue) {
        return .{ .data = self };
    }
};

const Config = struct {
    options: []ConfigOption,
};

const Schema = struct {
    @"$schema": []const u8 = "http://json-schema.org/draft-04/schema",
    title: []const u8 = "ZLS Config",
    description: []const u8 = "Configuration file for ZLS",
    type: []const u8 = "object",
    properties: std.json.ArrayHashMap(SchemaEntry),
};

const SchemaEntry = struct {
    description: []const u8,
    type: []const u8,
    items: ?struct { type: []const u8 } = null,
    @"enum": ?[]const []const u8 = null,
    default: std.json.Value,
};

const FormatDocs = struct {
    text: []const u8,
    comment_kind: CommentKind,

    const CommentKind = enum {
        normal,
        doc,
        top_level,
    };

    fn render(ctx: FormatDocs, writer: *std.Io.Writer) std.Io.Writer.Error!void {
        const prefix = switch (ctx.comment_kind) {
            .normal => "// ",
            .doc => "/// ",
            .top_level => "//! ",
        };
        var i: usize = 0;
        var iterator = std.mem.splitScalar(u8, ctx.text, '\n');
        while (iterator.next()) |line| : (i += 1) {
            if (i != 0) try writer.writeByte('\n');
            try writer.print("{s}{s}", .{ prefix, line });
        }
    }
};

fn fmtDocs(text: []const u8, comment_kind: FormatDocs.CommentKind) std.fmt.Alt(FormatDocs, FormatDocs.render) {
    return .{ .data = .{ .text = text, .comment_kind = comment_kind } };
}

fn generateConfigFile(allocator: std.mem.Allocator, config: Config, path: []const u8) (std.fs.Dir.WriteFileError || std.mem.Allocator.Error)!void {
    var aw: std.Io.Writer.Allocating = .init(allocator);
    defer aw.deinit();

    aw.writer.writeAll(
        \\//! DO NOT EDIT
        \\//! Configuration options for ZLS.
        \\//! If you want to add a config option edit
        \\//! src/tools/config.json
        \\//! GENERATED BY src/tools/config_gen.zig
        \\
    ) catch return error.OutOfMemory;

    for (config.options) |option| {
        aw.writer.print(
            \\
            \\{f}
            \\{f}: {f} = {f},
            \\
        , .{
            fmtDocs(std.mem.trim(u8, option.description, &std.ascii.whitespace), .doc),
            std.zig.fmtId(std.mem.trim(u8, option.name, &std.ascii.whitespace)),
            option.fmtZigType(),
            option.fmtDefaultValue(),
        }) catch return error.OutOfMemory;
    }

    aw.writer.writeAll(
        \\
        \\// DO NOT EDIT
        \\
    ) catch return error.OutOfMemory;

    const source_unformatted = try aw.toOwnedSliceSentinel(0);
    defer allocator.free(source_unformatted);

    var tree: std.zig.Ast = try .parse(allocator, source_unformatted, .zig);
    defer tree.deinit(allocator);
    std.debug.assert(tree.errors.len == 0);

    const source = try tree.renderAlloc(allocator);
    defer allocator.free(source);

    try std.fs.cwd().writeFile(.{
        .sub_path = path,
        .data = source,
    });
}

fn generateSchemaFile(allocator: std.mem.Allocator, config: Config, path: []const u8) !void {
    const schema_file = try std.fs.cwd().createFile(path, .{});
    defer schema_file.close();

    var buffer: [4096]u8 = undefined;
    var file_writer = schema_file.writer(&buffer);
    const writer = &file_writer.interface;

    var schema: Schema = .{ .properties = .{} };
    defer schema.properties.map.deinit(allocator);

    try schema.properties.map.ensureTotalCapacity(allocator, @intCast(config.options.len));

    for (config.options) |option| {
        schema.properties.map.putAssumeCapacityNoClobber(option.name, .{
            .description = option.description,
            .type = try option.getTypescriptType(),
            .items = if (std.mem.eql(u8, option.type, "[]const []const u8")) .{ .type = "string" } else null,
            .@"enum" = option.@"enum",
            .default = option.default,
        });
    }

    try std.json.Stringify.value(schema, .{
        .whitespace = .indent_4,
        .emit_null_optional_fields = false,
    }, writer);

    try writer.writeByte('\n');
    try file_writer.end();
}

const ConfigurationProperty = struct {
    scope: []const u8 = "resource",
    type: []const u8,
    description: []const u8,
    @"enum": ?[]const []const u8 = null,
    format: ?[]const u8 = null,
    default: ?std.json.Value = null,
};

fn generateVSCodeConfigFile(allocator: std.mem.Allocator, config: Config, path: []const u8) !void {
    var config_file = try std.fs.cwd().createFile(path, .{});
    defer config_file.close();

    const predefined_configurations: usize = 4;
    var configuration: std.json.ArrayHashMap(ConfigurationProperty) = .{};
    try configuration.map.ensureTotalCapacity(allocator, @intCast(predefined_configurations + config.options.len));
    defer {
        for (configuration.map.keys()[predefined_configurations..]) |name| allocator.free(name);
        configuration.map.deinit(allocator);
    }

    configuration.map.putAssumeCapacityNoClobber("zig.zls.debugLog", .{
        .scope = "resource",
        .type = "boolean",
        .description = "Enable debug logging in release builds of ZLS.",
    });
    configuration.map.putAssumeCapacityNoClobber("zig.zls.trace.server", .{
        .scope = "window",
        .type = "string",
        .@"enum" = &.{ "off", "messages", "verbose" },
        .description = "Traces the communication between VS Code and the language server.",
        .default = .{ .string = "off" },
    });
    configuration.map.putAssumeCapacityNoClobber("zig.zls.path", .{
        .scope = "machine-overridable",
        .type = "string",
        .description = "Path to `zls` executable. Example: `C:/zls/zig-cache/bin/zls.exe`. The string \"zls\" means lookup ZLS in PATH.",
        .format = "path",
    });

    for (config.options) |option| {
        if (std.mem.eql(u8, option.name, "zig_exe_path")) continue; // vscode-zig has its own option for this
        if (std.mem.eql(u8, option.name, "force_autofix")) continue; // VS Code supports code actions on save without a workaround

        const snake_case_name = try std.fmt.allocPrint(allocator, "zig.zls.{s}", .{option.name});
        defer allocator.free(snake_case_name);
        const name = try snakeCaseToCamelCase(allocator, snake_case_name);
        errdefer allocator.free(name);

        const default: ?std.json.Value = if (std.mem.eql(u8, option.name, "enable_build_on_save"))
            // "enable_build_on_save" need to be explicitly set to 'null' so that it doesn't default to 'false'
            .null
        else if (option.default != .null)
            option.default
        else
            null;

        configuration.map.putAssumeCapacityNoClobber(name, .{
            .type = try option.getTypescriptType(),
            .description = option.description,
            .@"enum" = option.@"enum",
            .format = if (std.mem.find(u8, option.name, "path") != null) "path" else null,
            .default = default,
        });
    }

    var buffer: [4096]u8 = undefined;
    var file_writer = config_file.writer(&buffer);
    const writer = &file_writer.interface;

    try std.json.Stringify.value(configuration, .{
        .whitespace = .indent_2,
        .emit_null_optional_fields = false,
    }, writer);
    try file_writer.end();
}

fn snakeCaseToCamelCase(allocator: std.mem.Allocator, str: []const u8) error{OutOfMemory}![]u8 {
    const underscore_count = std.mem.count(u8, str, "_");
    var result = try allocator.alloc(u8, str.len - underscore_count);
    var i: usize = 0;
    var j: usize = 0;
    while (i < str.len) : (i += 1) {
        if (str[i] != '_') {
            result[j] = str[i];
            j += 1;
            continue;
        }
        if (i + 1 < str.len and 'a' <= str[i + 1] and str[i + 1] <= 'z') {
            result[j] = std.ascii.toUpper(str[i + 1]);
            i += 1;
            j += 1;
        }
    }
    return result;
}

/// Tokenizer for a langref.html.in file
/// example file: https://raw.githubusercontent.com/ziglang/zig/master/doc/langref.html.in
/// this is a modified version from https://github.com/ziglang/zig/blob/master/doc/docgen.zig
const Tokenizer = struct {
    buffer: []const u8,
    index: usize = 0,
    state: State = .Start,

    const State = enum {
        Start,
        LBracket,
        Hash,
        TagName,
        Eof,
    };

    const Token = struct {
        id: Id,
        start: usize,
        end: usize,

        const Id = enum {
            Invalid,
            Content,
            BracketOpen,
            TagContent,
            Separator,
            BracketClose,
            Eof,
        };
    };

    fn next(self: *Tokenizer) Token {
        var result: Token = .{
            .id = .Eof,
            .start = self.index,
            .end = undefined,
        };
        while (self.index < self.buffer.len) : (self.index += 1) {
            const c = self.buffer[self.index];
            switch (self.state) {
                .Start => switch (c) {
                    '{' => {
                        self.state = .LBracket;
                    },
                    else => {
                        result.id = .Content;
                    },
                },
                .LBracket => switch (c) {
                    '#' => {
                        if (result.id != .Eof) {
                            self.index -= 1;
                            self.state = .Start;
                            break;
                        } else {
                            result.id = .BracketOpen;
                            self.index += 1;
                            self.state = .TagName;
                            break;
                        }
                    },
                    else => {
                        result.id = .Content;
                        self.state = .Start;
                    },
                },
                .TagName => switch (c) {
                    '|' => {
                        if (result.id != .Eof) {
                            break;
                        } else {
                            result.id = .Separator;
                            self.index += 1;
                            break;
                        }
                    },
                    '#' => {
                        self.state = .Hash;
                    },
                    else => {
                        result.id = .TagContent;
                    },
                },
                .Hash => switch (c) {
                    '}' => {
                        if (result.id != .Eof) {
                            self.index -= 1;
                            self.state = .TagName;
                            break;
                        } else {
                            result.id = .BracketClose;
                            self.index += 1;
                            self.state = .Start;
                            break;
                        }
                    },
                    else => {
                        result.id = .TagContent;
                        self.state = .TagName;
                    },
                },
                .Eof => unreachable,
            }
        } else {
            switch (self.state) {
                .Start,
                .LBracket,
                .Eof,
                => {},
                else => {
                    result.id = .Invalid;
                },
            }
            self.state = .Eof;
        }
        result.end = self.index;
        return result;
    }
};

const Builtin = struct {
    name: []const u8,
    signature: [:0]const u8,
    documentation: std.ArrayList(u8),
};

/// parses a `langref.html.in` file and extracts builtins from this section: `https://ziglang.org/documentation/master/#Builtin-Functions`
/// the documentation field contains poorly formatted html
fn collectBuiltinData(allocator: std.mem.Allocator, version: []const u8, langref_file: []const u8) error{OutOfMemory}![]Builtin {
    var tokenizer: Tokenizer = .{ .buffer = langref_file };

    const State = enum {
        /// searching for this line:
        /// {#header_open|Builtin Functions|2col#}
        searching,
        /// skippig builtin functions description:
        /// Builtin functions are provided by the compiler and are prefixed ...
        prefix,
        /// every entry begins with this:
        /// {#syntax#}@addrSpaceCast(comptime addrspace: std.builtin.AddressSpace, ptr: anytype) anytype{#endsyntax#}
        builtin_begin,
        /// iterate over documentation
        builtin_content,
    };
    var state: State = .searching;

    var builtins: std.ArrayList(Builtin) = .empty;
    errdefer {
        for (builtins.items) |*builtin| {
            allocator.free(builtin.signature);
            builtin.documentation.deinit(allocator);
        }
        builtins.deinit(allocator);
    }

    var depth: u32 = undefined;
    while (true) {
        const token = tokenizer.next();
        switch (token.id) {
            .Content => {
                switch (state) {
                    .builtin_content => {
                        try builtins.items[builtins.items.len - 1].documentation.appendSlice(allocator, tokenizer.buffer[token.start..token.end]);
                    },
                    else => continue,
                }
            },
            .BracketOpen => {
                const tag_token = tokenizer.next();
                std.debug.assert(tag_token.id == .TagContent);
                const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];

                if (std.mem.eql(u8, tag_name, "header_open")) {
                    std.debug.assert(tokenizer.next().id == .Separator);
                    const content_token = tokenizer.next();
                    std.debug.assert(tag_token.id == .TagContent);
                    const content_name = tokenizer.buffer[content_token.start..content_token.end];

                    switch (state) {
                        .searching => {
                            if (std.mem.eql(u8, content_name, "Builtin Functions")) {
                                state = .prefix;
                                depth = 0;
                            }
                        },
                        .prefix, .builtin_begin => {
                            state = .builtin_begin;
                            try builtins.append(allocator, .{
                                .name = content_name,
                                .signature = "",
                                .documentation = .empty,
                            });
                        },
                        .builtin_content => unreachable,
                    }
                    if (state != .searching) {
                        depth += 1;
                    }

                    while (true) {
                        const bracket_tok = tokenizer.next();
                        switch (bracket_tok.id) {
                            .BracketClose => break,
                            .Separator, .TagContent => continue,
                            else => unreachable,
                        }
                    }
                } else if (std.mem.eql(u8, tag_name, "header_close")) {
                    std.debug.assert(tokenizer.next().id == .BracketClose);

                    if (state == .builtin_content) {
                        state = .builtin_begin;
                    }
                    if (state != .searching) {
                        depth -= 1;
                        if (depth == 0) break;
                    }
                } else if (state != .searching and std.mem.eql(u8, tag_name, "syntax")) {
                    std.debug.assert(tokenizer.next().id == .BracketClose);
                    const content_tag = tokenizer.next();
                    std.debug.assert(content_tag.id == .Content);
                    const content_name = tokenizer.buffer[content_tag.start..content_tag.end];
                    std.debug.assert(tokenizer.next().id == .BracketOpen);
                    const end_syntax_tag = tokenizer.next();
                    std.debug.assert(end_syntax_tag.id == .TagContent);
                    const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end];
                    std.debug.assert(std.mem.eql(u8, end_tag_name, "endsyntax"));
                    std.debug.assert(tokenizer.next().id == .BracketClose);

                    switch (state) {
                        .builtin_begin => {
                            builtins.items[builtins.items.len - 1].signature = try allocator.dupeZ(u8, content_name);
                            state = .builtin_content;
                        },
                        .builtin_content => {
                            const documentation = &builtins.items[builtins.items.len - 1].documentation;

                            var aw: std.Io.Writer.Allocating = .fromArrayList(allocator, documentation);
                            defer aw.deinit();
                            writeMarkdownCode(content_name, "zig", &aw.writer) catch return error.OutOfMemory;
                            documentation.* = aw.toArrayList();
                        },
                        else => {},
                    }
                } else if (state != .searching and std.mem.eql(u8, tag_name, "syntax_block")) {
                    std.debug.assert(tokenizer.next().id == .Separator);

                    const source_type_tag = tokenizer.next();
                    std.debug.assert(tag_token.id == .TagContent);
                    const source_type = tokenizer.buffer[source_type_tag.start..source_type_tag.end];
                    switch (tokenizer.next().id) {
                        .Separator => {
                            std.debug.assert(tokenizer.next().id == .TagContent);
                            std.debug.assert(tokenizer.next().id == .BracketClose);
                        },
                        .BracketClose => {},
                        else => unreachable,
                    }

                    const content_token = tokenizer.next();
                    std.debug.assert(content_token.id == .Content);
                    const content = tokenizer.buffer[content_token.start..content_token.end];
                    const documentation = &builtins.items[builtins.items.len - 1].documentation;

                    var aw: std.Io.Writer.Allocating = .fromArrayList(allocator, documentation);
                    defer aw.deinit();
                    writeMarkdownCode(content, source_type, &aw.writer) catch return error.OutOfMemory;
                    documentation.* = aw.toArrayList();

                    std.debug.assert(tokenizer.next().id == .BracketOpen);
                    const end_code_token = tokenizer.next();
                    std.debug.assert(tag_token.id == .TagContent);
                    const end_code_name = tokenizer.buffer[end_code_token.start..end_code_token.end];
                    std.debug.assert(std.mem.eql(u8, end_code_name, "end_syntax_block"));
                    std.debug.assert(tokenizer.next().id == .BracketClose);
                } else if (state != .searching and std.mem.eql(u8, tag_name, "link")) {
                    std.debug.assert(tokenizer.next().id == .Separator);
                    const name_token = tokenizer.next();
                    std.debug.assert(name_token.id == .TagContent);
                    const name = tokenizer.buffer[name_token.start..name_token.end];

                    const url_name = switch (tokenizer.next().id) {
                        .Separator => blk: {
                            const url_name_token = tokenizer.next();
                            std.debug.assert(url_name_token.id == .TagContent);
                            const url_name = tokenizer.buffer[url_name_token.start..url_name_token.end];
                            std.debug.assert(tokenizer.next().id == .BracketClose);
                            break :blk url_name;
                        },
                        .BracketClose => name,
                        else => unreachable,
                    };

                    const spaceless_url_name = try std.mem.replaceOwned(u8, allocator, url_name, " ", "-");
                    defer allocator.free(spaceless_url_name);

                    const documentation = &builtins.items[builtins.items.len - 1].documentation;
                    try documentation.print(allocator, "[{s}](https://ziglang.org/documentation/{s}/#{s})", .{
                        name,
                        version,
                        std.mem.trimStart(u8, spaceless_url_name, "@"),
                    });
                } else if (state != .searching and std.mem.eql(u8, tag_name, "code_begin")) {
                    std.debug.assert(tokenizer.next().id == .Separator);
                    std.debug.assert(tokenizer.next().id == .TagContent);
                    switch (tokenizer.next().id) {
                        .Separator => {
                            std.debug.assert(tokenizer.next().id == .TagContent);
                            switch (tokenizer.next().id) {
                                .Separator => {
                                    std.debug.assert(tokenizer.next().id == .TagContent);
                                    std.debug.assert(tokenizer.next().id == .BracketClose);
                                },
                                .BracketClose => {},
                                else => unreachable,
                            }
                        },
                        .BracketClose => {},
                        else => unreachable,
                    }

                    while (true) {
                        const content_token = tokenizer.next();
                        std.debug.assert(content_token.id == .Content);
                        const content = tokenizer.buffer[content_token.start..content_token.end];
                        std.debug.assert(tokenizer.next().id == .BracketOpen);
                        const end_code_token = tokenizer.next();
                        std.debug.assert(end_code_token.id == .TagContent);
                        const end_tag_name = tokenizer.buffer[end_code_token.start..end_code_token.end];

                        if (std.mem.eql(u8, end_tag_name, "code_end")) {
                            std.debug.assert(tokenizer.next().id == .BracketClose);

                            const documentation = &builtins.items[builtins.items.len - 1].documentation;

                            var aw: std.Io.Writer.Allocating = .fromArrayList(allocator, documentation);
                            defer aw.deinit();
                            writeMarkdownCode(content, "zig", &aw.writer) catch return error.OutOfMemory;
                            documentation.* = aw.toArrayList();
                            break;
                        }
                        std.debug.assert(tokenizer.next().id == .BracketClose);
                    }
                } else {
                    while (true) {
                        switch (tokenizer.next().id) {
                            .Eof => unreachable,
                            .BracketClose => break,
                            else => continue,
                        }
                    }
                }
            },
            else => unreachable,
        }
    }

    return try builtins.toOwnedSlice(allocator);
}

/// single line: \`{content}\`
/// multi line:
/// \`\`\`{source_type}
/// {content}
/// \`\`\`
fn writeMarkdownCode(content: []const u8, source_type: []const u8, writer: *std.Io.Writer) std.Io.Writer.Error!void {
    const trimmed_content = std.mem.trim(u8, content, " \n");
    const is_multiline = std.mem.findScalar(u8, trimmed_content, '\n') != null;
    if (is_multiline) {
        var line_it = std.mem.tokenizeScalar(u8, trimmed_content, '\n');
        try writer.print("\n```{s}", .{source_type});
        while (line_it.next()) |line| {
            try writer.print("\n{s}", .{line});
        }
        try writer.writeAll("\n```");
    } else {
        try writer.print("`{s}`", .{trimmed_content});
    }
}

fn writeLine(str: []const u8, single_line: bool, writer: *std.Io.Writer) std.Io.Writer.Error!void {
    const trimmed_content = std.mem.trim(u8, str, &std.ascii.whitespace);
    if (trimmed_content.len == 0) return;

    if (single_line) {
        var line_it = std.mem.splitScalar(u8, trimmed_content, '\n');
        while (line_it.next()) |line| {
            try writer.print("{s} ", .{std.mem.trim(u8, line, &std.ascii.whitespace)});
        }
    } else {
        try writer.writeAll(trimmed_content);
    }

    try writer.writeByte('\n');
}

/// converts text with various html tags into markdown
/// supported tags:
/// - `<p>`
/// - `<pre>`
/// - `<em>`
/// - `<ul>` and `<li>`
/// - `<a>`
/// - `<code>`
fn writeMarkdownFromHtml(html: []const u8, writer: *std.Io.Writer) !void {
    return writeMarkdownFromHtmlInternal(html, false, 0, writer);
}

/// this is kind of a hacky solution. A cleaner solution would be to implement using a xml/html parser.
fn writeMarkdownFromHtmlInternal(html: []const u8, single_line: bool, depth: u32, writer: *std.Io.Writer) !void {
    var index: usize = 0;
    while (std.mem.findScalarPos(u8, html, index, '<')) |tag_start_index| {
        const tags: []const []const u8 = &.{ "pre", "p", "em", "ul", "li", "a", "code" };
        const opening_tags: []const []const u8 = &.{ "<pre>", "<p>", "<em>", "<ul>", "<li>", "<a>", "<code>" };
        const closing_tags: []const []const u8 = &.{ "</pre>", "</p>", "</em>", "</ul>", "</li>", "</a>", "</code>" };
        const tag_index = for (tags, 0..) |tag_name, i| {
            if (std.mem.startsWith(u8, html[tag_start_index + 1 ..], tag_name)) break i;
        } else {
            index += 1;
            continue;
        };

        try writeLine(html[index..tag_start_index], single_line, writer);

        const tag_name = tags[tag_index];
        const opening_tag_name = opening_tags[tag_index];
        const closing_tag_name = closing_tags[tag_index];

        // std.debug.print("tag: '{s}'\n", .{tag_name});

        const content_start = 1 + (std.mem.findScalarPos(u8, html, tag_start_index + 1 + tag_name.len, '>') orelse return error.InvalidTag);

        index = content_start;
        const content_end = while (std.mem.findScalarPos(u8, html, index, '<')) |end| {
            if (std.mem.startsWith(u8, html[end..], closing_tag_name)) break end;
            if (std.mem.startsWith(u8, html[end..], opening_tag_name)) {
                index = std.mem.findPos(u8, html, end + opening_tag_name.len, closing_tag_name) orelse return error.MissingEndTag;
                index += closing_tag_name.len;
                continue;
            }
            index += 1;
        } else html.len;

        const content = html[content_start..content_end];
        index = @min(html.len, content_end + closing_tag_name.len);
        // std.debug.print("content: {s}\n", .{content});

        if (std.mem.eql(u8, tag_name, "p")) {
            try writeMarkdownFromHtmlInternal(content, true, depth, writer);
            try writer.writeByte('\n');
        } else if (std.mem.eql(u8, tag_name, "pre")) {
            try writeMarkdownFromHtmlInternal(content, false, depth, writer);
        } else if (std.mem.eql(u8, tag_name, "em")) {
            try writer.print("**{s}** ", .{content});
        } else if (std.mem.eql(u8, tag_name, "ul")) {
            try writeMarkdownFromHtmlInternal(content, false, depth + 1, writer);
        } else if (std.mem.eql(u8, tag_name, "li")) {
            try writer.splatByteAll(' ', 1 + (depth -| 1) * 2);
            try writer.writeAll("- ");
            try writeMarkdownFromHtmlInternal(content, true, depth, writer);
        } else if (std.mem.eql(u8, tag_name, "a")) {
            const href_part = std.mem.trimStart(u8, html[tag_start_index + 2 .. content_start - 1], " ");
            std.debug.assert(std.mem.startsWith(u8, href_part, "href=\""));
            std.debug.assert(href_part[href_part.len - 1] == '"');
            const url = href_part["href=\"".len .. href_part.len - 1];
            try writer.print("[{s}]({s})", .{ content, std.mem.trimStart(u8, url, "@") });
        } else if (std.mem.eql(u8, tag_name, "code")) {
            try writeMarkdownCode(content, "zig", writer);
        } else return error.UnsupportedTag;
    }

    try writeLine(html[index..], single_line, writer);
}

const Parameter = struct {
    signature: []const u8,
    type: ?[]const u8,
};

/// takes in a signature (without name or leading parenthesis) like this:
/// `comptime DestType: type, integer: anytype) DestType`
/// and outputs its parameters and return type:
/// `comptime DestType: type`, `integer: anytype`, `DestType`
fn extractParametersAndReturnTypeFromSignature(allocator: std.mem.Allocator, signature: [:0]const u8) error{OutOfMemory}!struct { []Parameter, []const u8 } {
    var parameters: std.ArrayList(Parameter) = .empty;
    defer parameters.deinit(allocator);

    var tokenizer: std.zig.Tokenizer = .init(signature);
    var argument_start: ?usize = null;
    var colon_index: ?usize = null;
    while (true) {
        const token = tokenizer.next();
        switch (token.tag) {
            .eof => unreachable,
            .l_paren => {
                var paren_depth: usize = 1;
                while (paren_depth > 0) {
                    switch (tokenizer.next().tag) {
                        .l_paren => paren_depth += 1,
                        .r_paren => paren_depth -= 1,
                        else => {},
                    }
                }
                continue;
            },
            .colon => {
                std.debug.assert(argument_start != null);
                std.debug.assert(colon_index == null);
                colon_index = token.loc.start;
            },
            .comma, .r_paren => |tag| {
                if (argument_start) |start| {
                    try parameters.append(allocator, .{
                        .signature = std.mem.trim(u8, signature[start..token.loc.start], &std.ascii.whitespace),
                        .type = if (colon_index) |i| std.mem.trim(u8, signature[1 + i .. token.loc.start], &std.ascii.whitespace) else null,
                    });
                }
                argument_start = null;
                colon_index = null;
                if (tag == .r_paren) break;
            },
            .doc_comment, .container_doc_comment => {},
            else => {
                if (argument_start == null) {
                    argument_start = token.loc.start;
                    std.debug.assert(colon_index == null);
                }
            },
        }
    }

    const return_type = signature[tokenizer.index + 1 ..];
    return .{ try parameters.toOwnedSlice(allocator), return_type };
}

fn createSignatureSnippet(
    allocator: std.mem.Allocator,
    builtin_name: []const u8,
    parameters: []const Parameter,
) error{OutOfMemory}![]const u8 {
    var snippet: std.ArrayList(u8) = .empty;
    defer snippet.deinit(allocator);

    try snippet.print(allocator, "{s}(", .{builtin_name});
    for (parameters, 1..) |param, i| {
        if (i != 1) try snippet.print(allocator, ", ", .{});
        try snippet.print(allocator, "${{{d}:{s}}}", .{ i, param.signature });
    }
    try snippet.append(allocator, ')');

    return try snippet.toOwnedSlice(allocator);
}

fn withoutStdBuiltinPrefix(type_str: []const u8) []const u8 {
    if (std.mem.startsWith(u8, type_str, "std.builtin.")) {
        return type_str["std.builtin.".len..];
    }
    return type_str;
}

/// Generates data files from the Zig language Reference (https://ziglang.org/documentation/master/)
/// Output example: https://github.com/zigtools/zls/blob/0.11.0/src/data/master.zig
fn generateVersionDataFile(allocator: std.mem.Allocator, version: []const u8, output_path: []const u8, langref_path: []const u8) !void {
    // const langref_source: []const u8 = @embedFile("langref.html.in");
    const langref_source = try std.fs.cwd().readFileAlloc(langref_path, allocator, .limited(16 * 1024 * 1024));
    defer allocator.free(langref_source);

    const builtins = try collectBuiltinData(allocator, version, langref_source);
    defer {
        for (builtins) |*builtin| {
            allocator.free(builtin.signature);
            builtin.documentation.deinit(allocator);
        }
        allocator.free(builtins);
    }

    var builtin_file = try std.fs.cwd().createFile(output_path, .{});
    defer builtin_file.close();

    var buffer: [4096]u8 = undefined;
    var file_writer = builtin_file.writer(&buffer);
    const writer = &file_writer.interface;

    try writer.writeAll(
        \\//! DO NOT EDIT
        \\//! GENERATED BY src/tools/config_gen.zig
        \\
        \\const std = @import("std");
        \\
        \\pub const Builtin = struct {
        \\    signature: []const u8,
        \\    return_type: []const u8,
        \\    snippet: []const u8,
        \\    documentation: []const u8,
        \\    parameters: []const Parameter,
        \\
        \\    pub const Parameter = struct {
        \\        signature: []const u8,
        \\        type: ?[]const u8,
        \\    };
        \\};
        \\
        \\pub const builtins: std.StaticStringMap(Builtin) = .initComptime(&.{
        \\
    );

    for (builtins) |builtin| {
        const parameters, const return_type = try extractParametersAndReturnTypeFromSignature(allocator, builtin.signature[builtin.name.len + 1 ..]);
        defer allocator.free(parameters);

        const snippet = try createSignatureSnippet(allocator, builtin.name, parameters);
        defer allocator.free(snippet);

        try writer.print(
            \\    .{{
            \\        "{f}",
            \\        Builtin{{
            \\            .signature = "{f}",
            \\            .return_type = "{f}",
            \\            .snippet = "{f}",
            \\
        , .{
            std.zig.fmtString(builtin.name),
            std.zig.fmtString(builtin.signature),
            std.zig.fmtString(withoutStdBuiltinPrefix(return_type)),
            std.zig.fmtString(snippet),
        });

        const html = builtin.documentation.items["</pre>".len..];
        var markdown: std.Io.Writer.Allocating = .init(allocator);
        defer markdown.deinit();
        writeMarkdownFromHtml(html, &markdown.writer) catch return error.OutOfMemory;

        try writer.writeAll("            .documentation =\n");
        var line_it = std.mem.splitScalar(u8, std.mem.trim(u8, markdown.written(), "\n"), '\n');
        while (line_it.next()) |line| {
            try writer.print("            \\\\{s}\n", .{std.mem.trimEnd(u8, line, " ")});
        }

        try writer.writeAll(
            \\            ,
            \\            .parameters = &[_]Builtin.Parameter{
        );

        if (parameters.len != 0) {
            try writer.writeByte('\n');
            for (parameters) |param| {
                try writer.print(
                    \\                .{{
                    \\                    .signature = "{f}",
                    \\
                , .{
                    std.zig.fmtString(param.signature),
                });
                if (param.type) |t| {
                    try writer.print("                    .type = \"{f}\",\n", .{
                        std.zig.fmtString(withoutStdBuiltinPrefix(t)),
                    });
                } else {
                    try writer.writeAll("                    .type = null,\n");
                }
                try writer.writeAll("                },\n");
            }
            try writer.writeAll("            },\n");
        } else {
            try writer.writeAll("},\n");
        }

        try writer.writeAll(
            \\        },
            \\    },
            \\
        );
    }

    try writer.writeAll(
        \\});
        \\
        \\// DO NOT EDIT
        \\
    );
    try file_writer.end();
}

pub fn main() !void {
    var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
    defer _ = debug_allocator.deinit();
    const gpa = debug_allocator.allocator();

    var args_it = try std.process.argsWithAllocator(gpa);
    defer args_it.deinit();

    _ = args_it.skip();

    var config_path: ?[]const u8 = null;
    var schema_path: ?[]const u8 = null;
    var vscode_config_path: ?[]const u8 = null;
    var version_data_path: ?[]const u8 = null;
    var langref_path: ?[]const u8 = null;
    var langref_version: ?[]const u8 = null;

    while (args_it.next()) |argname| {
        if (std.mem.eql(u8, argname, "--help")) {
            try std.fs.File.stdout().writeAll(
                \\Usage: zig build gen -- [command]
                \\
                \\Commands:
                \\  --help                           Prints this message
                \\  --generate-vscode-config [path]  Output zls-vscode configurations
                \\  --generate-config [path]         Output path to config file (see src/Config.zig)
                \\  --generate-schema [path]         Output json schema file (see schema.json)
                \\  --generate-version-data [path]   Output data file
                \\  --langref-path [path]            Input langref.html.in file path
                \\  --langref-version [version]      Input langref.html.in version
                \\
            );
            return std.process.cleanExit();
        } else if (std.mem.eql(u8, argname, "--generate-config")) {
            config_path = args_it.next() orelse {
                std.process.fatal("Expected output path after --generate-config argument.\n", .{});
            };
        } else if (std.mem.eql(u8, argname, "--generate-schema")) {
            schema_path = args_it.next() orelse {
                std.process.fatal("Expected output path after --generate-schema argument.\n", .{});
            };
        } else if (std.mem.eql(u8, argname, "--generate-vscode-config")) {
            vscode_config_path = args_it.next() orelse {
                std.process.fatal("Expected output path after --generate-vscode-config argument.\n", .{});
            };
        } else if (std.mem.eql(u8, argname, "--generate-version-data")) {
            version_data_path = args_it.next() orelse {
                std.process.fatal("Expected output path after --generate-version-data argument.\n", .{});
            };
        } else if (std.mem.eql(u8, argname, "--langref-path")) {
            langref_path = args_it.next() orelse {
                std.process.fatal("Expected output path after --langref-path argument.\n", .{});
            };
        } else if (std.mem.eql(u8, argname, "--langref-version")) {
            langref_version = args_it.next() orelse {
                std.process.fatal("Expected version after --langref-version argument.\n", .{});
            };
            const is_valid_version = blk: {
                if (std.mem.eql(u8, langref_version.?, "master")) break :blk true;
                _ = std.SemanticVersion.parse(langref_version.?) catch break :blk false;
                break :blk true;
            };
            if (!is_valid_version) {
                std.process.fatal("'{s}' is not a valid argument after --langref-version.\n", .{langref_version.?});
            }
        } else {
            std.process.fatal("Unrecognized argument '{s}'.\n", .{argname});
        }
    }

    const config_json = try std.json.parseFromSlice(Config, gpa, @embedFile("config.json"), .{});
    defer config_json.deinit();
    const config = config_json.value;

    if (config_path) |output_path| {
        try generateConfigFile(gpa, config, output_path);
    }
    if (schema_path) |output_path| {
        try generateSchemaFile(gpa, config, output_path);
    }
    if (vscode_config_path) |output_path| {
        try generateVSCodeConfigFile(gpa, config, output_path);
        try std.fs.File.stderr().writeAll(
            \\Changing configuration options may also require editing the `package.json` from ziglang/vscode-zig at https://github.com/ziglang/vscode-zig/blob/master/package.json
            \\You can use `zig build gen -- --vscode-config-path /path/to/output/file.json` to generate the new configuration properties which you can then copy into `package.json`
            \\
        );
    }
    if (version_data_path) |output_path| {
        try generateVersionDataFile(
            gpa,
            langref_version orelse std.process.fatal("--generate-version-data requires --langref-version to be specified", .{}),
            output_path,
            langref_path orelse std.process.fatal("--generate-version-data requires --langref-path to be specified", .{}),
        );
    }
}
